Load data

### setwd("/Users/seantrott/Dropbox/UCSD/Research/Ambiguity/SSD/spanish_norms/src/analysis/")
### Read in all data
df_final = read_csv("../../data/processed/human/sawc_relatedness_full_critical_data.csv")
nrow(df_final)
## [1] 10639
length(unique(df_final$Participant))
## [1] 131

RQ1: Same vs. Different Sense

df_ratio = df_final %>% 
  group_by(Same_sense) %>%
  mutate(count_condition = n()) %>%
  ungroup() %>%
  group_by(Same_sense, Response, count_condition) %>%
  summarise(count_response = n()) %>%
  mutate(prop_response = count_response / count_condition)
              
df_ratio %>%         
  ggplot(aes(x = Response,
             y = prop_response)) +
  geom_bar(alpha = .6, stat = "identity") +
  theme_minimal() +
  labs(x = "Relatedness",
       y = "P(Response | Condition)") +
  facet_wrap(~Same_sense) +
  theme(text = element_text(size = 15),
        legend.position="none")

df_final %>%         
  ggplot(aes(x = Response)) +
  geom_bar(alpha = .6, stat = "count") +
  theme_minimal() +
  labs(x = "Relatedness",
       y = "Count") +
  facet_wrap(~Same_sense) +
  theme(text = element_text(size = 15),
        legend.position="none")

mod_full = lmer(data = df_final,
                Response ~ Same_sense +
                  (1 + Same_sense | Participant) + 
                  (1 + Same_sense | List) + (1 | Word),
                REML = FALSE)

mod_reduced = lmer(data = df_final,
                Response ~ # Same_sense +
                  (1 + Same_sense | Participant) + 
                  (1 + Same_sense | List) + (1 | Word),
                REML = FALSE)

summary(mod_full)
## Linear mixed model fit by maximum likelihood . t-tests use Satterthwaite's
##   method [lmerModLmerTest]
## Formula: Response ~ Same_sense + (1 + Same_sense | Participant) + (1 +  
##     Same_sense | List) + (1 | Word)
##    Data: df_final
## 
##      AIC      BIC   logLik deviance df.resid 
##  34198.6  34271.3 -17089.3  34178.6    10629 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.8340 -0.6554 -0.1521  0.6411  3.2534 
## 
## Random effects:
##  Groups      Name                 Variance Std.Dev. Corr 
##  Participant (Intercept)          0.13240  0.3639        
##              Same_senseSame Sense 0.13065  0.3614   -0.59
##  Word        (Intercept)          0.32042  0.5661        
##  List        (Intercept)          0.02791  0.1671        
##              Same_senseSame Sense 0.07990  0.2827   -0.55
##  Residual                         1.35993  1.1662        
## Number of obs: 10639, groups:  Participant, 131; Word, 102; List, 10
## 
## Fixed effects:
##                      Estimate Std. Error       df t value Pr(>|t|)    
## (Intercept)           2.11454    0.08478 29.45271   24.94  < 2e-16 ***
## Same_senseSame Sense  2.24722    0.09838  9.67540   22.84 9.65e-10 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr)
## Sm_snsSmSns -0.406
anova(mod_full, mod_reduced)
## Data: df_final
## Models:
## mod_reduced: Response ~ (1 + Same_sense | Participant) + (1 + Same_sense | List) + (1 | Word)
## mod_full: Response ~ Same_sense + (1 + Same_sense | Participant) + (1 + Same_sense | List) + (1 | Word)
##             npar   AIC   BIC logLik deviance  Chisq Df Pr(>Chisq)    
## mod_reduced    9 34236 34302 -17109    34218                         
## mod_full      10 34199 34271 -17089    34179 39.589  1  3.135e-10 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

RQ2 + RQ5: Correlation with BETO

Load and process BETO data

### BETO distances
df_beto_distances = read_csv("../../data/processed/models/sawc-distances_model-bert-base-spanish-wwm-cased.csv")
nrow(df_beto_distances)
## [1] 10556
### basic items to get Tag
df_sawc_items = read_csv("../../data/raw/items/sawc_sentence_pairs.csv")

### merge together
df_merged = df_beto_distances %>%
  left_join(df_sawc_items)
nrow(df_merged)
## [1] 10556

Merge with SAW-C Norms

df_list_mean = df_final %>%
  group_by(List, Word, Tag) %>%
  summarise(mean_relatedness = mean(Response), .groups = "drop",
            count = n())
nrow(df_list_mean)
## [1] 812
df_merged_beto = df_merged %>%
  inner_join(df_list_mean)
nrow(df_merged_beto)
## [1] 10556

RQ2: Correlation by layer

df_by_layer = df_merged_beto %>%
  group_by(Layer) %>%
  summarise(r = cor(mean_relatedness, Distance, method = "pearson"),
            r2 = r ** 2,
            rho = cor(mean_relatedness, Distance, method = "spearman"),
            count = n())

summary(df_by_layer$rho)
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
## -0.58780 -0.57770 -0.56818 -0.48504 -0.47098  0.02152
df_by_layer %>%
  ggplot(aes(x = Layer,
             y = r)) +
  geom_line(size = 2,
            color = "cornflower blue") +
  theme_minimal() +
  labs(x = "Layer (BETO)",
       y = "Pearson's r") +
  scale_x_continuous(breaks = c(0:13)) +
  theme(text = element_text(size = 15),
        legend.position="none")

df_by_layer %>%
  ggplot(aes(x = Layer,
             y = rho)) +
  geom_line(size = 2,
            color = "cornflower blue") +
  theme_minimal() +
  labs(x = "Layer (BETO)",
       y = "Spearman's rho") +
  scale_x_continuous(breaks = c(0:13)) +
  theme(text = element_text(size = 15),
        legend.position="none")

df_by_layer %>%
  ggplot(aes(x = Layer,
             y = r2)) +
  geom_line(size = 2,
            color = "cornflower blue") +
  theme_minimal() +
  labs(x = "Layer (BETO)",
       y = "R2") +
  scale_x_continuous(breaks = c(0:13)) +
  theme(text = element_text(size = 15),
        legend.position="none")

min(df_by_layer$rho)
## [1] -0.5878024

RQ5: Expected layer

df_by_layer <- df_by_layer %>%
  mutate(r2_delta = c(NA, diff(r2))) %>%
  mutate(weighted_layer = Layer * r2_delta)

expected_layer = sum(df_by_layer$weighted_layer, na.rm = TRUE) / sum(df_by_layer$r2_delta, na.rm = TRUE)
expected_layer
## [1] 2.454195
df_by_layer %>%
  ggplot(aes(x = Layer,
             y = r2)) +
  geom_line(size = 2,
            color = "cornflower blue",
            alpha = .8) +
  theme_minimal() +
  geom_vline(xintercept = expected_layer, size = 1.5, 
             linetype = "dashed", alpha = .7) +
  geom_vline(xintercept = 7, size = 1.5, 
             linetype = "dotted", alpha = .7) +
  scale_x_continuous(breaks = c(0:13)) +
  labs(x = "Layer (BETO)",
       y = "R2") +
  theme(text = element_text(size = 15),
        legend.position="none") 

RQ3: Cosine distance vs. Same/Different

Now, we select the best-performing layer from BETO.

df_beto_l5 = df_merged %>%
  filter(Layer == 7) %>%
  select(-Same_sense)
nrow(df_beto_l5)
## [1] 812
df_experimental_with_beto = df_final %>%
  left_join(df_beto_l5)
nrow(df_experimental_with_beto)
## [1] 10639
mod_full = lmer(data = df_experimental_with_beto,
                Response ~ Same_sense + Distance +
                  (1 + Same_sense + Distance | Participant) + 
                  (1 | List) + (1 | Word),
                REML = FALSE)

mod_reduced = lmer(data = df_experimental_with_beto,
                Response ~ Distance + # Same_sense +
                  (1 + Same_sense + Distance | Participant) + 
                  (1 | List) + (1 | Word),
                REML = FALSE)

mod_just_same = lmer(data = df_experimental_with_beto,
                Response ~ Same_sense + # Distance
                  (1 + Same_sense + Distance | Participant) + 
                  (1 | List) + (1 | Word),
                REML = FALSE)

summary(mod_full)
## Linear mixed model fit by maximum likelihood . t-tests use Satterthwaite's
##   method [lmerModLmerTest]
## Formula: Response ~ Same_sense + Distance + (1 + Same_sense + Distance |  
##     Participant) + (1 | List) + (1 | Word)
##    Data: df_experimental_with_beto
## 
##      AIC      BIC   logLik deviance df.resid 
##  33795.9  33883.2 -16886.0  33771.9    10627 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -4.0633 -0.6341 -0.1164  0.6223  3.5636 
## 
## Random effects:
##  Groups      Name                 Variance Std.Dev. Corr       
##  Participant (Intercept)          0.23178  0.4814              
##              Same_senseSame Sense 0.19301  0.4393   -0.51      
##              Distance             0.87600  0.9359   -0.69 -0.15
##  Word        (Intercept)          0.28323  0.5322              
##  List        (Intercept)          0.01825  0.1351              
##  Residual                         1.30359  1.1417              
## Number of obs: 10639, groups:  Participant, 131; Word, 102; List, 10
## 
## Fixed effects:
##                       Estimate Std. Error        df t value Pr(>|t|)    
## (Intercept)            2.91931    0.09022  74.77499   32.36   <2e-16 ***
## Same_senseSame Sense   1.87369    0.04923 144.30838   38.06   <2e-16 ***
## Distance              -4.26971    0.22414 306.55782  -19.05   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) Sm_sSS
## Sm_snsSmSns -0.389       
## Distance    -0.523  0.300
anova(mod_full, mod_reduced)
## Data: df_experimental_with_beto
## Models:
## mod_reduced: Response ~ Distance + (1 + Same_sense + Distance | Participant) + (1 | List) + (1 | Word)
## mod_full: Response ~ Same_sense + Distance + (1 + Same_sense + Distance | Participant) + (1 | List) + (1 | Word)
##             npar   AIC   BIC logLik deviance  Chisq Df Pr(>Chisq)    
## mod_reduced   11 34125 34205 -17052    34103                         
## mod_full      12 33796 33883 -16886    33772 331.55  1  < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(mod_full, mod_just_same)
## Data: df_experimental_with_beto
## Models:
## mod_just_same: Response ~ Same_sense + (1 + Same_sense + Distance | Participant) + (1 | List) + (1 | Word)
## mod_full: Response ~ Same_sense + Distance + (1 + Same_sense + Distance | Participant) + (1 | List) + (1 | Word)
##               npar   AIC   BIC logLik deviance  Chisq Df Pr(>Chisq)    
## mod_just_same   11 34002 34082 -16990    33980                         
## mod_full        12 33796 33883 -16886    33772 208.27  1  < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
### Visualize
df_experimental_with_beto %>%
  mutate(distance_binned = ntile(Distance, 20)) %>%
  group_by(Same_sense, distance_binned) %>%
  summarize(
    mean_relatedness = mean(Response),
    sd_relatedness = sd(Response),
    count = n(),
    se_relatedness = sd_relatedness / sqrt(count),
  ) %>%
  ggplot(aes(x = distance_binned, 
             y = mean_relatedness, 
             color = Same_sense, 
             fill = Same_sense)) +
  geom_line(size = 1.5) +
  geom_ribbon(aes(ymin = mean_relatedness - se_relatedness, 
                  ymax = mean_relatedness + se_relatedness), 
              alpha = 0.8,
              color = NA) +
  labs(x = "BETO Cosine Distance (Binned)",
       y = "Relatedness",
       color = "Same Sense",
       fill = "Same Sense") +
  theme_minimal() +
  scale_fill_manual(values = my_colors)  +
  scale_color_manual(values = my_colors)  +
  theme(text = element_text(size = 15),
        legend.position="bottom")

RQ4: BETO vs. inter-annotator agreement

### First, get group means
df_list_mean = df_experimental_with_beto %>%
  group_by(List, Word, Tag) %>%
  summarise(mean_relatedness = mean(Response), .groups = "drop",
            count = n(),
            distance = mean(Distance))
nrow(df_list_mean)
## [1] 812
### Get BETO cor
BETO_COR = abs(cor(df_list_mean$mean_relatedness,
               df_list_mean$distance, method = "spearman"))

### Now, iterate through ppts
ppts = unique(df_final$Participant)
df_r = data.frame()

for (ppt in ppts) {
  # Subset df_critical for the current participant
  individual_data <- df_final %>%
    filter(Participant == ppt) %>%
    select(List, Word, Tag, Response)  # Ensure you're selecting the needed columns

  # Merge individual data with mean data
  merged_data <- inner_join(individual_data, df_list_mean, by = c("List", "Word", "Tag"))
  
  test = cor.test(merged_data$Response,
                  merged_data$mean_relatedness,
                 method = "spearman")
  
  df_test = broom::tidy(test)
  df_test$ppt = ppt
  df_test$List = unique(individual_data$List)
  df_r = rbind(df_r, df_test)

}

### Visualization
df_r %>%
  ggplot(aes(x = estimate)) +
  geom_histogram(alpha = .6) +
  scale_x_continuous(limits = c(0, 1)) +
  theme_minimal() +
  geom_vline(xintercept = BETO_COR, size = 1.5, 
             linetype = "dashed", alpha = .7) +
  labs(x = "Leave-one-out Correlation") +
  theme(text = element_text(size = 15),
        legend.position="none")

summary(df_r$estimate)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.3946  0.7480  0.7962  0.7725  0.8308  0.8834
### What proportion of inter-annotator agreement scores are larger?
prop_larger = df_r %>%
  mutate(larger = estimate >= BETO_COR) %>%
  summarise(mean(larger))
prop_larger
## # A tibble: 1 × 1
##   `mean(larger)`
##            <dbl>
## 1          0.947

RQ6: Layer vs. same/different

The actual code implementation is executed in Python, but a visualization is produced here.

# Calculate mean and SE of Distance
df_summary <- df_beto_distances %>%
  group_by(Layer, Same_sense) %>%
  summarise(
    mean_Distance = mean(Distance),
    sd_Distance = sd(Distance),
    count = n(),
    se_Distance = sd_Distance / sqrt(count),
    .groups = 'drop'  # Drop the automatic grouping by dplyr
  )

df_summary %>%
  ggplot(aes(x = Layer, 
             y = mean_Distance, 
             color = Same_sense, 
             fill = Same_sense)) +
  geom_line(size = 1.5) +
  geom_ribbon(aes(ymin = mean_Distance - 2 * se_Distance, 
                  ymax = mean_Distance + 2 * se_Distance), 
              alpha = 0.8,
              color = NA) +
  labs(x = "Layer (BETO)",
       y = "Mean Distance",
       color = "Same Sense",
       fill = "Same Sense") +
  scale_x_continuous(breaks = c(0:13)) +
  theme_minimal() +
  # scale_fill_viridis(option = "mako", discrete=TRUE) +
  # scale_color_viridis(option = "mako", discrete=TRUE) +
  scale_fill_manual(values = my_colors)  +
  scale_color_manual(values = my_colors)  +
  theme(text = element_text(size = 15),
        legend.position="bottom")

Additional visualizations

df_item_means = df_final %>%
  group_by(List, Word, Same_sense, Sentence_1, Sentence_2,
           Sense_id_s1, Sense_id_s2, Gender_s1, Gender_s2) %>%
  summarise(mean_relatedness = mean(Response),
            sd_relatedness = sd(Response),
            median_relatedness = median(Response),
            count = n())


df_item_means %>%         
  ggplot(aes(x = mean_relatedness)) +
  geom_histogram(alpha = .6, bins = 10) +
  theme_minimal() +
  labs(x = "Mean Relatedness",
       y = "Count") +
  facet_wrap(~Same_sense) +
  theme(text = element_text(size = 15),
        legend.position="none")

df_item_means %>%
  ggplot(aes(x = mean_relatedness,
             y = Same_sense,
             fill = Same_sense)) +
  geom_density_ridges2(aes(height = ..density..), 
                       color=gray(0.25), 
                       alpha = .7, 
                       scale=.85, 
                       # size=1, 
                       size = 0,
                       stat="density") +
  labs(x = "Mean Relatedness",
       y = "",
       fill = "") +
  theme_minimal()+
  scale_fill_manual(values = my_colors)  +
  theme(text = element_text(size = 15),
        legend.position="none") 

Comparing all models together

df_beto = read_csv("../../data/processed/models/sawc-distances_model-bert-base-spanish-wwm-cased.csv") %>%
  mutate(Model = "BETO-cased",
         Multilingual = "Monolingual")
df_xlm = read_csv("../../data/processed/models/sawc-distances_model-xlm-roberta-base.csv") %>%
  mutate(Model = "XLM-RoBERTa",
         Multilingual = "Multilingual")

df_mb = read_csv("../../data/processed/models/sawc-distances_model-bert-base-multilingual-cased.csv") %>%
  mutate(Model = "Multilingual BERT",
         Multilingual = "Multilingual")

df_db = read_csv("../../data/processed/models/sawc-distances_model-distilbert-base-spanish-uncased.csv") %>%
  mutate(Model = "DistilBETO",
         Multilingual = "Monolingual")

df_ab = read_csv("../../data/processed/models/sawc-distances_model-albert-base-spanish.csv") %>%
  mutate(Model = "ALBERT-base",
         Multilingual = "Monolingual")

df_at = read_csv("../../data/processed/models/sawc-distances_model-albert-tiny-spanish.csv") %>%
  mutate(Model = "ALBERT-tiny",
         Multilingual = "Monolingual")

df_axl = read_csv("../../data/processed/models/sawc-distances_model-albert-xlarge-spanish.csv") %>%
  mutate(Model = "ALBERT-xlarge",
         Multilingual = "Monolingual")

df_al = read_csv("../../data/processed/models/sawc-distances_model-albert-large-spanish.csv") %>%
  mutate(Model = "ALBERT-large",
         Multilingual = "Monolingual")

df_axxl = read_csv("../../data/processed/models/sawc-distances_model-albert-xxlarge-spanish.csv") %>%
  mutate(Model = "ALBERT-xxlarge",
         Multilingual = "Monolingual")

df_rb = read_csv("../../data/processed/models/sawc-distances_model-roberta-base-bne.csv") %>%
  mutate(Model = "RoBERTa-base",
         Multilingual = "Monolingual")

df_rl = read_csv("../../data/processed/models/sawc-distances_model-roberta-large-bne.csv") %>%
  mutate(Model = "RoBERTa-large",
         Multilingual = "Monolingual")

df_beto_uncased = read_csv("../../data/processed/models/sawc-distances_model-bert-base-spanish-wwm-uncased.csv") %>%
  mutate(Model = "BETO-uncased",
         Multilingual = "Monolingual")

df_all = df_beto %>%
  bind_rows(df_xlm) %>%
  bind_rows(df_db) %>%
  bind_rows(df_ab) %>%
  bind_rows(df_at) %>%
  bind_rows(df_mb) %>%
  bind_rows(df_axl) %>%
  bind_rows(df_al) %>%
  bind_rows(df_axxl) %>%
  bind_rows(df_rb) %>%
  bind_rows(df_rl) %>%
  bind_rows(df_beto_uncased)

nrow(df_all)
## [1] 144536
### basic items to get Tag
df_sawc_items = read_csv("../../data/raw/items/sawc_sentence_pairs.csv")

### merge together
df_merged = df_all %>%
  left_join(df_sawc_items)

df_list_mean = df_final %>%
  group_by(List, Word, Tag) %>%
  summarise(mean_relatedness = mean(Response), .groups = "drop",
            count = n())
nrow(df_list_mean)
## [1] 812
df_merged = df_merged %>%
  inner_join(df_list_mean)
nrow(df_merged)
## [1] 144536

R2 by layer, by model

df_by_layer = df_merged %>%
  group_by(Model, Multilingual, Layer, n_params) %>%
  summarise(r = cor(mean_relatedness, Distance, method = "pearson"),
            r2 = r ** 2,
            rho = cor(mean_relatedness, Distance, method = "spearman"),
            count = n())

max(df_by_layer$r2)
## [1] 0.3321961
df_by_layer %>%
  filter(r2 == max(df_by_layer$r2))
## # A tibble: 1 × 8
## # Groups:   Model, Multilingual, Layer [1]
##   Model      Multilingual Layer  n_params      r    r2    rho count
##   <chr>      <chr>        <dbl>     <dbl>  <dbl> <dbl>  <dbl> <int>
## 1 BETO-cased Monolingual      7 109850880 -0.576 0.332 -0.587   812
df_by_layer %>%
  filter(Model != "ALBERT-tiny") %>%
  filter(Model != "ALBERT-xlarge") %>%
  filter(Model != "DistilBETO") %>%
  filter(Model != "ALBERT-large") %>%
  filter(Model != "RoBERTa-large") %>%
  ggplot(aes(x = Layer,
             y = r2,
             color = Model,
             linetype = Model)) +
  geom_line(size = 2) +
  theme_minimal() +
  labs(x = "Layer",
       y = "R2",
       color = "",
       linetype = "") +
  scale_x_continuous(breaks = c(0:13)) +
  scale_color_viridis(option = "mako", discrete=TRUE) +
  theme(text = element_text(size = 12),
        legend.position="bottom")

df_by_layer %>%
  filter(Model != "ALBERT-tiny") %>%
  filter(Model != "ALBERT-xlarge") %>%
  filter(Model != "DistilBETO") %>%
  filter(Model != "ALBERT-large") %>%
  filter(Model != "RoBERTa-large") %>%
  ggplot(aes(x = Layer,
             y = r2,
             color = Model)) +
  geom_line(size = 2) +
  theme_minimal() +
  labs(x = "Layer",
       y = "R2",
       color = "") +
  scale_x_continuous(breaks = c(0:13)) +
  scale_color_viridis(option = "mako", discrete=TRUE) +
  theme(text = element_text(size = 12),
        legend.position="bottom")

df_by_layer %>%
  filter(Model %in% c("ALBERT-xlarge", 
                      "ALBERT-large",
                      "RoBERTa-large")) %>%
  ggplot(aes(x = Layer,
             y = r2,
             color = Model)) +
  geom_line(size = 2) +
  theme_minimal() +
  labs(x = "Layer",
       y = "R2",
       color = "") +
  scale_color_viridis(option = "mako", discrete=TRUE) +
  theme(text = element_text(size = 15),
        legend.position="bottom")

Comparison by model parameters

The BETO/ALBERT/DistilBETO family of models provide a nice within-family comparison of the effect of number of parameters, since they are trained on the same datasets.

df_max_r2 = df_by_layer %>%
  group_by(Model, n_params, Multilingual) %>%
  summarise(max_r2 = max(r2))

df_max_r2 %>%
  ggplot(aes(x = n_params,
             y = max_r2,
             color = Multilingual,
             shape = Multilingual)) +
  geom_point(size = 6,
             alpha = .7) +
  geom_hline(yintercept = mean(df_r$estimate)**2, ### Human accuracy 
              linetype = "dotted", color = "red",
             size = 1.2, alpha = .5) +
  geom_text_repel(aes(label=Model), size=3) +
  scale_x_log10() +
  scale_y_continuous(limits = c(0, 1)) +
  labs(x = "Parameters",
       y = "Maximum R2",
       color = "",
       shape = "") +
  theme_minimal() +
  # guides(color="none") +
  # scale_color_viridis(option = "mako", discrete=TRUE) +
  scale_color_manual(values = my_colors)  +
  theme(text = element_text(size = 15),
        legend.position="bottom")

Same vs. Different Sense residuals

# Fit models and calculate residuals for each LLM
models <- by(df_merged, df_merged$Model, function(subdata) {
  model <- lm(mean_relatedness ~ Distance * Layer, data = subdata)
  subdata$residuals <- residuals(model)
  return(subdata)
})

# Combine the results back into a single dataframe
results <- do.call(rbind, models)

# Modify Same_sense
results = results %>%
  mutate(Same_sense = case_when(
    Same_sense == TRUE ~ "Same Sense",
    Same_sense == FALSE ~ "Different Sense"
  ))
  



results %>%
  ggplot(aes(x = residuals,
             fill = Same_sense)) +
  geom_density(alpha = .7, size = 0) +
  geom_vline(xintercept = 0, size = .6, linetype = "dashed") +
  theme_minimal() +
  # scale_fill_viridis(option = "mako", discrete=TRUE) +
  theme(text = element_text(size = 15),
        legend.position="bottom") +
  scale_fill_manual(values = my_colors)  +
  labs(x = "Residuals (Relatedness ~ Distance * Layer)",
       fill = "") +
  facet_wrap(~Model)